Vamos a estudiar el efecto del volumen escaneado pero ahora en vez de usar el rango como variable que representa ese efecto vamos a evaluar el alto del volumen (perpendicular al suelo).
radial_wind <- ReadNetCDF("../VAD_datos/RMA4/casos/cfrad.20181121_105940.0000_to_20181121_110221.0000_RMA4_0200_02.nc", vars = c("Vda", "azimuth", "elevation")) %>%
setDT()
VAD <- with(radial_wind, vad_fit(Vda, azimuth, range, elevation, r2 = 0.8, outlier_threshold = 2.5)) %>%
setDT()
VAD %>%
.[, spd := sqrt(u^2 + v^2)] %>%
.[, `:=`(elev_inf = elevation - 0.5,
elev_sup = elevation + 0.5,
range_ini = range - 75,
range_fin = range + 75)] %>%
.[, `:=`(hg_inf = beam_propagation(range_ini, elev_inf)[["ht"]],
hg_sup = beam_propagation(range_fin, elev_sup)[["ht"]])] %>%
.[, dz := hg_sup - hg_inf]
VAD[, height_cut := cut_width(height, 200)] %>%
na.omit() %>%
ggplot(aes(spd, dz)) +
geom_point(aes(color = factor(elevation))) +
geom_smooth(method = lm) +
scale_color_discrete(name = "Elevación") +
facet_wrap(~height_cut, scales = "free")
na.omit(VAD) %>%
.[, FitLm(spd, dz, se = TRUE), by = .(elevation, cut_width(height, 200))] %>%
.[term == "dz"] %>%
ggplot(aes(estimate, cut_width)) +
geom_vline(xintercept = 0, color = "darkgray") +
geom_point(aes(color = factor(elevation), size = df)) +
geom_path(aes(group = factor(elevation), color = factor(elevation))) +
scale_color_discrete(name = "Elevación") +
scale_size(name = "Grados de \nlibertad") +
labs(title = "Relación entre la velocidad y la distancia", x = "Pendiente (m/s*km)", y = "Capas (m)") +
# geom_errorbarh(aes(xmin = estimate - std.error*2, xmax = estimate + std.error*2)) +
theme(legend.position = "bottom")
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_path).
na.omit(VAD) %>%
.[, FitLm(spd, dz, se = TRUE), by = .(cut_width(height, 200))] %>%
.[term == "dz"] %>%
ggplot(aes(estimate, cut_width)) +
geom_vline(xintercept = 0, color = "darkgray") +
geom_point(aes(size = df)) +
geom_path() +
scale_size(name = "Grados de \nlibertad") +
labs(title = "Relación entre la velocidad y la distancia", x = "Pendiente (m/s*km)", y = "Capas (m)") +
# geom_errorbarh(aes(xmin = estimate - std.error*2, xmax = estimate + std.error*2)) +
theme(legend.position = "bottom")
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## Warning in L$marker$color[idx] <- aes2plotly(data, params, "fill")[idx]:
## number of items to replace is not a multiple of replacement length
ring_fit2 <- function (ring, azimuth, elev, outlier_threshold = Inf)
{
nas <- is.na(ring)
if (sum(nas) == length(ring)) {
return(list(u = NA_real_, v = NA_real_, r2 = NA_real_,
rmse = NA_real_))
}
n_outliers <- 1
while (n_outliers > 0) {
fit <- stats::lm.fit(cbind(1, cos(azimuth * pi/180),
sin(azimuth * pi/180))[!nas, , drop = FALSE], ring[!nas])
rmse <- stats::sd(fit$residuals)
outliers <- abs(fit$residuals) >= outlier_threshold *
rmse
n_outliers <- sum(outliers)
# browser()
ring[!nas][outliers] <- NA
# ring[!nas][outliers] <- fit$fitted.values[outliers]
# ring <- rvad:::ring_qc(ring, azimuth, max_na = 0.4)
nas <- is.na(ring)
if (sum(nas) == (length(ring) - 3) ) {
return(list(u = NA_real_, v = NA_real_, r2 = NA_real_,
rmse = NA_real_))
}
}
r2 <- 1 - stats::var(fit$residuals)/stats::var(ring[!nas])
coef_cos <- fit$coefficients[2]
coef_sin <- fit$coefficients[3]
v <- coef_cos/cos(elev * pi/180)
u <- coef_sin/cos(elev * pi/180)
# list(azimuth, fit$fitted.values, fit$residuals)
return(list(u = u, v = v, r2 = r2, rmse = rmse))
}
radial_wind[range %in% c(6240, 6390) & elevation == unique(elevation)[4]] %>%
ggplot(aes(azimuth, Vda)) +
geom_line(aes(color = factor(range))) +
# geom_smooth(aes(color = factor(range)), span = 0.5) +
labs(title = "VAD para dos anillos particularmente molestos",
subtitle = "Elevación = 4.57º",
color = "Rango")
radial_wind[range %in% c(6240, 6390) & elevation == unique(elevation)[4]] %>%
.[, ring_fit2(Vda, azimuth, elevation[1], outlier_threshold = 2), by = .(range)] %>%
.[, V := metR::Mag(u, v)] %>%
.[]
## range u v r2 rmse V
## 1: 6240 -3.873376 -11.58958 0.9958829 0.5382776 12.21971
## 2: 6390 -4.105933 -11.36880 0.9958341 0.5287766 12.08753
## Warning: Removed 677 rows containing missing values (geom_path).
## Warning: Removed 695 rows containing missing values (geom_point).
La función test_VAD() compara el perfil calculado con VAD con el sondeo a la misma hora y devuelve el rmse y bias para la magnitud y dirección de viento junto con la fracción de puntos que no son NA.
parametros <- CJ(max_na = seq(0.1, 0.3, 0.1),
max_consecutive_na = 30,
r2_min = seq(0.7, 0.9, 0.05),
outlier_threshold = c(Inf, 2, 2.5, 3))
En cada figura se incluye la variación de los 3 parámetros más importantes:
max_na en el eje xr2_min en el eje youtlier_threshold en cada subplot.Las 4 figuras correpsonden a las cuatro métricas: rmse y bias para la magnitud y para la dirección del viento.
Algunas conclusiones preliminares:
outlier_threshold es muy exigente, osea 2 o 2.5; el r2_min deja de tener tanta importancia porque los anillos que quedan luego de interar sobre el fit hasta conseguir “el mejor fit posible”. Esto es particularmente cierto cuando el max_na también es muy exigente. El problema es que el perfil nos queda con muy pocos puntos (en comparación con el sondeo).max_na = 0.2max_consecutive_na = 30r2_min = 0.8# files <- list.files("../VAD_datos/RMA4/casos/", pattern = ".nc", full.names = TRUE)
#
# radial_wind <- purrr::map(files, function(f) {
# ReadNetCDF(f, vars = c("Vda", "azimuth", "elevation")) %>%
# .[, real_time := time[1]]
# }) %>%
# rbindlist()
#
# parametros <- CJ(max_na = seq(0.1, 0.3, 0.1),
# max_consecutive_na = 30,
# r2_min = seq(0.7, 0.9, 0.05),
# outlier_threshold = c(Inf, 2, 2.5, 3),
# fecha_real = unique(radial_wind$real_time))
# metricas_VAD <- parametros[, {print(paste(max_na, r2_min, outlier_threshold, fecha_real))
# test_VAD(fecha_real, with(radial_wind[real_time == fecha_real], vad_fit(Vda, azimuth, range, elevation, max_na = max_na,
# max_consecutive_na = max_consecutive_na, r2_min = r2_min,
# outlier_threshold = outlier_threshold)),
# soundings)}, by = c(colnames(parametros))]
#
# fwrite(metricas_VAD, "metricas.csv")
metricas_VAD <- fread("metricas.csv") %>%
.[, fecha_real := ymd_hms(fecha_real)]
A partir del gráfico (promedia sobre todos los casos el rmse_spd y el frac_n) que sigue decido usar:
- `max_na = 0.3`
- `max_consecutive_na = 30`
- `r2_min = 0.8`
- `outlier_threshold = 2.5`
Para el ciclo diario y vemos que pasar.
metricas_VAD[, .(mean_rmse_spd = mean(rmse_spd, na.rm = TRUE),
mean_bias_spd = mean(bias_spd, na.rm = TRUE),
mean_frac_n = mean(frac_n, na.rm = TRUE)), by = .(max_na, max_consecutive_na, r2_min, outlier_threshold)] %>%
ggplot(aes(max_na, r2_min)) +
geom_raster(aes(fill = mean_rmse_spd)) +
scale_fill_viridis_c(direction = -1) +
geom_text(aes(label = round(mean_frac_n, 2))) +
facet_wrap(~factor(outlier_threshold)) +
theme_minimal()
metricas_VAD[order(rmse_spd, -frac_n, na.last = TRUE)]
## max_na max_consecutive_na r2_min outlier_threshold
## 1: 0.3 30 0.80 3
## 2: 0.2 30 0.75 Inf
## 3: 0.1 30 0.70 2
## 4: 0.1 30 0.75 2
## 5: 0.1 30 0.80 2
## ---
## 1796: 0.3 30 0.85 Inf
## 1797: 0.3 30 0.90 Inf
## 1798: 0.3 30 0.90 Inf
## 1799: 0.3 30 0.90 Inf
## 1800: 0.3 30 0.90 Inf
## fecha_real rmse_spd bias_spd rmse_dir bias_dir
## 1: 2018-11-24 10:57:40 0.01528460 -2.1317441 1.481041 -0.5555837
## 2: 2018-11-24 11:06:27 0.02049263 -0.8947237 3.183275 -3.1365717
## 3: 2018-12-10 11:10:39 0.04035358 0.5261815 4.188472 3.6958122
## 4: 2018-12-10 11:10:39 0.04035358 0.5261815 4.188472 3.6958122
## 5: 2018-12-10 11:10:39 0.04035358 0.5261815 4.188472 3.6958122
## ---
## 1796: 2018-11-02 11:00:46 NA NA NA NA
## 1797: 2018-11-02 11:00:46 NA NA NA NA
## 1798: 2018-11-02 11:09:38 NA NA NA NA
## 1799: 2018-11-25 11:11:44 NA NA NA NA
## 1800: 2018-12-13 10:58:02 NA NA NA NA
## frac_n
## 1: 0.2500000
## 2: 0.2500000
## 3: 0.1666667
## 4: 0.1666667
## 5: 0.1666667
## ---
## 1796: 0.0000000
## 1797: 0.0000000
## 1798: 0.0000000
## 1799: 0.0000000
## 1800: 0.0000000
metricas_VAD %>%
ggplot(aes(rmse_spd, outlier_threshold)) +
geom_point()
## Warning: Removed 169 rows containing missing values (geom_point).